# Purpose:
# Replicate the permutation-based chi-square analysis testing whether
# cognitive-process category differs by experience group.

library(readxl)
library(dplyr)
library(stringr)

input_file <- "data/change to data file name"
sheet_name <- "fixed"
output_file <- "outputs/06_tc_group_permutation.txt"

df <- read_excel(input_file, sheet = sheet_name)

required_cols <- c("tier", "text", "correct ID", "ניסיון מקצועי")
missing_cols <- setdiff(required_cols, names(df))
if (length(missing_cols) > 0) {
  stop("Missing required columns: ", paste(missing_cols, collapse = ", "))
}

df_tc <- df %>%
  filter(str_to_lower(tier) == "tc") %>%
  mutate(
    tc = str_to_lower(text),
    group = if_else(`ניסיון מקצועי` == "סטודנט תואר ראשון", "novice", "expert"),
    participant_id = `correct ID`
  ) %>%
  filter(tc %in% c("in", "rt", "sm", "ws", "rp", "empty")) %>%
  filter(!is.na(group), !is.na(participant_id))

obs_table <- table(df_tc$tc, df_tc$group)
obs_chi <- as.numeric(chisq.test(obs_table)$statistic)

set.seed(42)
B <- 10000
perm_chis <- numeric(B)

for (i in seq_len(B)) {
  perm_df <- df_tc %>%
    group_by(participant_id) %>%
    mutate(group_perm = sample(group)) %>%
    ungroup()

  perm_table <- table(perm_df$tc, perm_df$group_perm)
  perm_chis[i] <- as.numeric(chisq.test(perm_table)$statistic)
}

perm_p <- mean(perm_chis >= obs_chi)

sink(output_file)
cat("Experience group by cognitive-process permutation chi-square\n\n")
cat("Observed chi-square: ", round(obs_chi, 4), "\n", sep = "")
cat("Permutation p-value: ", round(perm_p, 4), "\n", sep = "")
cat("Iterations: ", B, "\n\n", sep = "")
cat("Observed contingency table:\n")
print(obs_table)
sink()
